import os
import openai
import random
import numpy as np
import json
import jsonlines
import time
from tqdm import tqdm
from rank_bm25 import BM25Okapi
import threading

# OPENAI_API_KEY = "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"
# OPENAI_API_KEY = "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"
OPENAI_API_KEY = "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"
openai.api_key = OPENAI_API_KEY

Example_prompt = '''
Here is the example, please follow this example.
Step 1: Initial Perception
Option A: Captions about a person and a squirrel, in contexts unrelated to water or boating.
Option B: One caption about a man in a boat reaching out, which is closely related to the concept of 'person overboard'.
Which is better? Option B. It contains elements (a boat, a person in the water) that are more directly relevant to the idea of 'person overboard'.

Step 2: Recognizing Incongruity
Option A: No direct incongruity; the captions are ordinary but unrelated to the sentences given.
Option B: No incongruity either, but one caption directly aligns with the concept of a 'person overboard'.
Which is better? Option B. While neither option presents an incongruity, Option B is more congruent with the provided sentences.

Step 3: Contextual Analysis
Option A: Contexts involving a squirrel and a boy, unrelated to 'person overboard'.
Option B: One caption clearly depicts a boating scenario, aligning well with the phrase 'person overboard'.
Which is better? Option B. It contains a caption that directly fits the context of the given sentences, unlike Option A.

Step 4: Linking to the Question
Option A: Fails to link to the question, as neither caption matches the given sentences.
Option B: Directly matches the 'person overboard' concept in one of its captions.
Which is better? Option B. It provides a direct link to the question with its boating scenario, which is absent in Option A.
'''


def ask_gpt4(question, thread_id, file_lock, line, unanswered_questions):
    messages=[{"role": "user", "content": question}]
    attempt_time = 0
    max_time = 20
    while attempt_time < max_time:
        try:
            response =  openai.ChatCompletion.create(
                            model="gpt-4",
                            max_tokens=1000,
                            temperature=1.2,
                            messages = messages)
            answer = response["choices"][0]["message"]["content"]

            with file_lock:
                with open('./gpt4_ans/winoground/anscot/test.jsonl','a') as outfile:
                    line['gpt4_rate'] = answer
                    outfile.write(json.dumps(line) + "\n")
                    break

        except openai.error.RateLimitError: # Rate limit exceeded
            attempt_time += 1
            time.sleep(0.2)
        except openai.error.Timeout: # Rate limit exceeded
            attempt_time += 1
            time.sleep(0.2)
        except openai.error.OpenAIError:
            attempt_time += 1
            raise Exception("Sorry, a problem happened")
    if attempt_time == max_time:
        unanswered_questions.append((question, thread_id, line))
        

def read_jsonline(sample_file):
    samples = []
    for line in sample_file.iter():
        sample = '''The option A: %s''' %(str(line['predictions']))
        samples.append(sample)
    return samples
        
if __name__=="__main__":
    caption_file = jsonlines.open('./data/winoground/random_icl/test.jsonl')
    explanation_file = jsonlines.open('./data/winoground/cb_icl/test.jsonl')
    corpus = read_jsonline(caption_file)
    file_lock = threading.Lock()
    threads = []
    unanswered_questions = []
    with tqdm(desc='Process', unit='it', total=41) as pbar: #5_6: (260); 10_12: (85); swow: (84)
        num = 0
        for line in explanation_file.iter():
            captions = line['labels']
            option_a = corpus[num]
            option_b = '''The option B: %s''' %(str(line['explanations']))
            start_prompt = '''Evaluate the equivalence of the following two captions list for the question "match the captions with the following two sentences %s from captions?" ''' %(str(captions))
            middle_prompt = '''%s; %s. ''' %(option_a, option_b)
            last_prompt = '''Please follow the same four step comparison method (Step 1: Initial Perception; Step 2: Recognizing Incongruity; Step 3: Contextual Analysis; Step 4: Linking to the Question) and analyze in each step which option is better. '''
            content = f'''{start_prompt}{middle_prompt}{last_prompt}{Example_prompt}'''
            thread = threading.Thread(target=ask_gpt4, args=(content, num+1, file_lock, line, unanswered_questions))
            threads.append(thread)
            thread.start()
            num = num+1
            pbar.update()

        for thread in threads:
            thread.join()

        if unanswered_questions:
            retry_threads = []
            for question, thread_id, line in unanswered_questions:
                retry_thread = threading.Thread(target=ask_gpt4, args=(question, thread_id, file_lock, line, []))
                retry_threads.append(retry_thread)
                retry_thread.start()

            for thread in retry_threads:
                thread.join()